library(gridExtra)
library(ggplot2)
library(dplyr)
The following plots were created on May 25, 2017 to understand linear transformations of the independent variable to to better fit linear relationships to non-linear data.
set.seed(123)
lin_data <- data.frame(x = runif(1000, 0, 100))
lin_data$y <- rnorm(1000, 10, 10) + 1.2 * lin_data$x
mid <- ggplot(lin_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.5)) +
labs(title = "y = x")
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- left_data$x # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
line <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = x")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- 1/right_data$x # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
recip <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = 1/(x) \n <---1/(x) Transform----")
grid.arrange(line,mid,recip, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- log(left_data$x)
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
log <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = log(x) \n ---Log(x) Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- exp(right_data$x)
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
exp <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = e^x \n <---Exp(x) Transform----")
grid.arrange(log,mid,exp, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- left_data$x^2 # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
square <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = x^2 \n ---x^2 Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- sqrt(right_data$x) # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
sqrt <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = sqrt(x) \n <---sqrt(x) Transform----")
grid.arrange(square,mid,sqrt, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- left_data$x^3 # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
cube <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = x^3 \n ---x^3 Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- right_data$x^(1/3) # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
cubic_root <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3,color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = x^(1/3) \n <---x^(1/3) Transform----")
grid.arrange(cube,mid,cubic_root, ncol = 3)
# Left
left_data <- data.frame(x = lin_data$x)
left_data$y <- sin(left_data$x) # Change here
left_data$y <- left_data$y + rnorm(1000, 0.25*max(left_data$y), 0.125*max(left_data$y))
sin <- ggplot(left_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3, color = "red") +
geom_smooth(method="lm", color = "blue") +
theme(plot.title = element_text(hjust = 0.8)) +
labs(title = "y = sin(x) \n ---sin(x) Transform--->")
# Right
right_data <- data.frame(x = lin_data$x)
right_data$y <- cos(right_data$x) # Change here
right_data$y <- right_data$y + rnorm(1000, 0.25*max(right_data$y), 0.125*max(right_data$y))
cos <- ggplot(right_data, aes(x,y)) +
geom_point() +
geom_smooth(method="loess", span = 0.3, color = "red") +
geom_smooth(method="lm", color = "blue") +
labs(title = "y = cos(x) \n <---cos(x) Transform----")
grid.arrange(sin,mid,cos, ncol = 3)
one <- arrangeGrob(line,mid,recip, ncol = 3)
two <- arrangeGrob(log,mid,exp, ncol = 3)
three <- arrangeGrob(square,mid,sqrt, ncol = 3)
four <- arrangeGrob(cube,mid,cubic_root, ncol = 3)
five <- arrangeGrob(sin,mid,cos, ncol = 3)
grid.arrange(one, two, three, four, five,ncol = 1)